{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Code for Chapter 2 of Deep Learning with Structured Data\n",
    "This notebook contains the code snippets that accompany Chapter 2.\n",
    "\n",
    "Note that this code assumes the following files are in a directory called \"data\" that is a sibling of the directory containing this notebook file:\n",
    "- iriscaps.csv\n",
    "- ttc-streetcar-delay-data-2014.xlsx "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "raw path is C:\\personal\\manning\\deep_learning_for_structured_data\\notebooks\n"
     ]
    }
   ],
   "source": [
    "# get the directory for that this notebook is in\n",
    "import os\n",
    "rawpath = os.getcwd()\n",
    "print(\"raw path is\",rawpath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "path is C:\\personal\\manning\\deep_learning_for_structured_data\\data\n"
     ]
    }
   ],
   "source": [
    "# data is in a directory called \"data\" that is a sibling to the directory containing the notebook\n",
    "# this code assumes you have copied to this directory all the XLS files from the source dataset: https://www.toronto.ca/city-government/data-research-maps/open-data/open-data-catalogue/#e8f359f0-2f47-3058-bf64-6ec488de52da\n",
    "path = os.path.abspath(os.path.join(rawpath, '..', 'data'))\n",
    "print(\"path is\", path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "      <th>species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_length  sepal_width  petal_length  petal_width species\n",
       "0           5.1          3.5           1.4          0.2  setosa\n",
       "1           4.9          3.0           1.4          0.2  setosa\n",
       "2           4.7          3.2           1.3          0.2  setosa\n",
       "3           4.6          3.1           1.5          0.2  setosa\n",
       "4           5.0          3.6           1.4          0.2  setosa"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# import pandas library\n",
    "import pandas as pd\n",
    "# raw github URL for Iris dataset\n",
    "url=\"https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/d546eaee765268bf2f487608c537c05e22e4b221/iris.csv\"\n",
    "# read the contents of the URL into a Pandas dataframe\n",
    "iris_dataframe=pd.read_csv(url)\n",
    "# display the first few rows from the new dataframe\n",
    "iris_dataframe.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "150"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_dataframe.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "      <th>species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_length  sepal_width  petal_length  petal_width species\n",
       "0           5.1          3.5           1.4          0.2  setosa\n",
       "1           4.9          3.0           1.4          0.2  setosa\n",
       "2           4.7          3.2           1.3          0.2  setosa\n",
       "3           4.6          3.1           1.5          0.2  setosa\n",
       "4           5.0          3.6           1.4          0.2  setosa"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "setosa_frame = iris_dataframe[iris_dataframe[\"species\"] == 'setosa']\n",
    "setosa_frame.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "50"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_dataframe[iris_dataframe[\"species\"] == 'setosa'].shape[0] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "      <th>species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_length  sepal_width  petal_length  petal_width species\n",
       "0           5.1          3.5           1.4          0.2  Setosa\n",
       "1           4.9          3.0           1.4          0.2  Setosa\n",
       "2           4.7          3.2           1.3          0.2  Setosa\n",
       "3           4.6          3.1           1.5          0.2  Setosa\n",
       "4           5.0          3.6           1.4          0.2  Setosa"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# import pandas library\n",
    "import pandas as pd\n",
    "# define the base path and the file name - update the path value to the path for your data files\n",
    "file = \"iriscaps.csv\"\n",
    "# read the contents of the file into a Pandas dataframe\n",
    "iris_dataframe=pd.read_csv(os.path.join(path,file))\n",
    "# display the first few rows from the new dataframe\n",
    "iris_dataframe.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: xlrd in c:\\users\\ryanm\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (1.2.0)"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using pip version 19.0.3, however version 20.0.2 is available.\n",
      "You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "!pip install xlrd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sheet_name Feb 2014\n",
      "sheet_name Mar 2014\n",
      "sheet_name Apr 2014\n",
      "sheet_name May 2014\n",
      "sheet_name Jun 2014\n",
      "sheet_name July 2014\n",
      "sheet_name Aug 2014\n",
      "sheet_name Sept 2014\n",
      "sheet_name Oct 2014\n",
      "sheet_name Nov 2014\n",
      "sheet_name Dec 2014\n"
     ]
    }
   ],
   "source": [
    "# import pandas library\n",
    "import pandas as pd\n",
    "# define the base path and the file name\n",
    "file = \"ttc-streetcar-delay-data-2014.xlsx\"\n",
    "# load meta data about the Excel file\n",
    "xlsf = pd.ExcelFile(os.path.join(path,file))\n",
    "# load the first sheet of the Excel file into a dataframe\n",
    "df = pd.read_excel(os.path.join(path,file),sheet_name=xlsf.sheet_names[0])\n",
    "# iterate through the remaining sheets appending their contents onto the dataframe\n",
    "for sheet_name in xlsf.sheet_names[1:]:\n",
    "    print(\"sheet_name\",sheet_name)\n",
    "    # load the current sheet into a dataframe\n",
    "    data = pd.read_excel(os.path.join(path,file),sheet_name=sheet_name)\n",
    "    # append this dataframe onto the aggregated dataframe\n",
    "    df = df.append(data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11027, 10)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Report Date</th>\n",
       "      <th>Route</th>\n",
       "      <th>Time</th>\n",
       "      <th>Day</th>\n",
       "      <th>Location</th>\n",
       "      <th>Incident</th>\n",
       "      <th>Min Delay</th>\n",
       "      <th>Min Gap</th>\n",
       "      <th>Direction</th>\n",
       "      <th>Vehicle</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>505</td>\n",
       "      <td>06:31:00</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>Dundas and Roncesvalles</td>\n",
       "      <td>Late Leaving Garage</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4018.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>12:43:00</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>King and Shaw</td>\n",
       "      <td>Utilized Off Route</td>\n",
       "      <td>20.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4128.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>501</td>\n",
       "      <td>14:01:00</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>Kingston road and Bingham</td>\n",
       "      <td>Held By</td>\n",
       "      <td>13.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>W/B</td>\n",
       "      <td>4016.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>14:22:00</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>King St. and Roncesvalles Ave.</td>\n",
       "      <td>Investigation</td>\n",
       "      <td>7.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>W/B</td>\n",
       "      <td>4175.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>16:42:00</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>King and Bathurst</td>\n",
       "      <td>Utilized Off Route</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4080.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Report Date  Route      Time       Day                        Location  \\\n",
       "0  2014-01-02    505  06:31:00  Thursday         Dundas and Roncesvalles   \n",
       "1  2014-01-02    504  12:43:00  Thursday                   King and Shaw   \n",
       "2  2014-01-02    501  14:01:00  Thursday       Kingston road and Bingham   \n",
       "3  2014-01-02    504  14:22:00  Thursday  King St. and Roncesvalles Ave.   \n",
       "4  2014-01-02    504  16:42:00  Thursday               King and Bathurst   \n",
       "\n",
       "              Incident  Min Delay  Min Gap Direction  Vehicle  \n",
       "0  Late Leaving Garage        4.0      8.0       E/B   4018.0  \n",
       "1   Utilized Off Route       20.0     22.0       E/B   4128.0  \n",
       "2              Held By       13.0     19.0       W/B   4016.0  \n",
       "3        Investigation        7.0     11.0       W/B   4175.0  \n",
       "4   Utilized Off Route        3.0      6.0       E/B   4080.0  "
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Report Date</th>\n",
       "      <th>Route</th>\n",
       "      <th>Time</th>\n",
       "      <th>Day</th>\n",
       "      <th>Location</th>\n",
       "      <th>Incident</th>\n",
       "      <th>Min Delay</th>\n",
       "      <th>Min Gap</th>\n",
       "      <th>Direction</th>\n",
       "      <th>Vehicle</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>869</td>\n",
       "      <td>2014-12-31</td>\n",
       "      <td>509</td>\n",
       "      <td>22:30:00</td>\n",
       "      <td>Wednesday</td>\n",
       "      <td>Union Loop to Exhibition Loop</td>\n",
       "      <td>General Delay</td>\n",
       "      <td>10.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>B/W</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>870</td>\n",
       "      <td>2014-12-31</td>\n",
       "      <td>504</td>\n",
       "      <td>22:54:00</td>\n",
       "      <td>Wednesday</td>\n",
       "      <td>King and Dunn</td>\n",
       "      <td>Emergency Services</td>\n",
       "      <td>11.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4128.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>871</td>\n",
       "      <td>2014-12-31</td>\n",
       "      <td>505</td>\n",
       "      <td>23:00:00</td>\n",
       "      <td>Wednesday</td>\n",
       "      <td>Dundas West Station to Broadview Station</td>\n",
       "      <td>General Delay</td>\n",
       "      <td>10.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>B/W</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>872</td>\n",
       "      <td>2014-12-31</td>\n",
       "      <td>511</td>\n",
       "      <td>23:01:00</td>\n",
       "      <td>Wednesday</td>\n",
       "      <td>CNE</td>\n",
       "      <td>Mechanical</td>\n",
       "      <td>8.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>N/B</td>\n",
       "      <td>4160.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>873</td>\n",
       "      <td>2014-12-31</td>\n",
       "      <td>504</td>\n",
       "      <td>23:18:00</td>\n",
       "      <td>Wednesday</td>\n",
       "      <td>King and Bathurst</td>\n",
       "      <td>Mechanical</td>\n",
       "      <td>7.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4128.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Report Date  Route      Time        Day  \\\n",
       "869  2014-12-31    509  22:30:00  Wednesday   \n",
       "870  2014-12-31    504  22:54:00  Wednesday   \n",
       "871  2014-12-31    505  23:00:00  Wednesday   \n",
       "872  2014-12-31    511  23:01:00  Wednesday   \n",
       "873  2014-12-31    504  23:18:00  Wednesday   \n",
       "\n",
       "                                     Location            Incident  Min Delay  \\\n",
       "869             Union Loop to Exhibition Loop       General Delay       10.0   \n",
       "870                             King and Dunn  Emergency Services       11.0   \n",
       "871  Dundas West Station to Broadview Station       General Delay       10.0   \n",
       "872                                       CNE          Mechanical        8.0   \n",
       "873                         King and Bathurst          Mechanical        7.0   \n",
       "\n",
       "     Min Gap Direction  Vehicle  \n",
       "869     20.0       B/W      NaN  \n",
       "870     16.0       E/B   4128.0  \n",
       "871     12.0       B/W      NaN  \n",
       "872     16.0       N/B   4160.0  \n",
       "873     14.0       E/B   4128.0  "
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save a dataframe to your filesystem as a pickle file\n",
    "file = \"iris_dataframe.pkl\"\n",
    "iris_dataframe.to_pickle(os.path.join(path,file))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "      <th>species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_length  sepal_width  petal_length  petal_width species\n",
       "0           5.1          3.5           1.4          0.2  Setosa\n",
       "1           4.9          3.0           1.4          0.2  Setosa\n",
       "2           4.7          3.2           1.3          0.2  Setosa\n",
       "3           4.6          3.1           1.5          0.2  Setosa\n",
       "4           5.0          3.6           1.4          0.2  Setosa"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load a saved pickle file into a dataframe\n",
    "file = \"iris_dataframe.pkl\"\n",
    "iris_dataframe_from_pickle = pd.read_pickle(os.path.join(path,file))\n",
    "iris_dataframe_from_pickle.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
